{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building prefix dict from the default dictionary ...\n",
      "Dumping model to file cache C:\\Users\\ADMINI~1\\AppData\\Local\\Temp\\jieba.cache\n",
      "Loading model cost 0.820 seconds.\n",
      "Prefix dict has been built successfully.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "分词结果: 2022年的北京冬奥会是第24届冬季奥林匹克运动会\n"
     ]
    }
   ],
   "source": [
    "import jieba\n",
    "import os\n",
    "from nltk.parse import stanford\n",
    "string='2022年的北京冬奥会是第24届冬季奥林匹克运动会'\n",
    "seg_list=jieba.cut(string ,cut_all=False,HMM=True)\n",
    "seg_str=''.join(seg_list)\n",
    "print('分词结果:',seg_str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.environ.get('JAVA_HOME'):\n",
    "    JAVA_HOME = 'C:Program Files\\Java\\jdk-21'\n",
    "    os.environ['JAVA_HOME'] = JAVA_HOME"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: DeprecationWarning: The StanfordParser will be deprecated\n",
      "Please use \u001b[91mnltk.parse.corenlp.StanforCoreNLPParser\u001b[0m instead.\n",
      "  after removing the cwd from sys.path.\n"
     ]
    },
    {
     "ename": "LookupError",
     "evalue": "\n\n===========================================================================\nNLTK was unable to find the java file!\nUse software specific configuration paramaters or set the JAVAHOME environment variable.\n===========================================================================",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mLookupError\u001b[0m                               Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-4-aab2c9ee7c13>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[0mpcfg_path\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'edu/stanford/nlp/models/lexparser/shinesePCFG.ser.gz'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[0mparser\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mstanford\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mStanfordParser\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpath_to_jar\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mparser_path\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mpath_to_models_jar\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmodel_path\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mmodel_path\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpcfg_path\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[0msentence\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mparser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mraw_parse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mseg_str\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\nltk\\parse\\stanford.py\u001b[0m in \u001b[0;36mraw_parse\u001b[1;34m(self, sentence, verbose)\u001b[0m\n\u001b[0;32m    132\u001b[0m         \u001b[1;33m:\u001b[0m\u001b[0mrtype\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0miter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mTree\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    133\u001b[0m         \"\"\"\n\u001b[1;32m--> 134\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mnext\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mraw_parse_sents\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0msentence\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    135\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    136\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mraw_parse_sents\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msentences\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\nltk\\parse\\stanford.py\u001b[0m in \u001b[0;36mraw_parse_sents\u001b[1;34m(self, sentences, verbose)\u001b[0m\n\u001b[0;32m    150\u001b[0m             \u001b[1;34m'-outputFormat'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_OUTPUT_FORMAT\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    151\u001b[0m         ]\n\u001b[1;32m--> 152\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_parse_trees_output\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_execute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'\\n'\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msentences\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    153\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    154\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mtagged_parse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msentence\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\nltk\\parse\\stanford.py\u001b[0m in \u001b[0;36m_execute\u001b[1;34m(self, cmd, input_, verbose)\u001b[0m\n\u001b[0;32m    198\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    199\u001b[0m         \u001b[1;31m# Configure java.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 200\u001b[1;33m         \u001b[0mconfig_java\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjava_options\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mverbose\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    201\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    202\u001b[0m         \u001b[1;31m# Windows is incompatible with NamedTemporaryFile() without passing in delete=False.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\nltk\\__init__.py\u001b[0m in \u001b[0;36mconfig_java\u001b[1;34m(bin, options, verbose)\u001b[0m\n\u001b[0;32m     56\u001b[0m     \"\"\"\n\u001b[0;32m     57\u001b[0m     \u001b[1;32mglobal\u001b[0m \u001b[0m_java_bin\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_java_options\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 58\u001b[1;33m     \u001b[0m_java_bin\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfind_binary\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'java'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbin\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0menv_vars\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'JAVAHOME'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'JAVA_HOME'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mverbose\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbinary_names\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'java.exe'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     59\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     60\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0moptions\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\nltk\\__init__.py\u001b[0m in \u001b[0;36mfind_binary\u001b[1;34m(name, path_to_bin, env_vars, searchpath, binary_names, url, verbose)\u001b[0m\n\u001b[0;32m    602\u001b[0m                 binary_names=None, url=None, verbose=False):\n\u001b[0;32m    603\u001b[0m     return next(find_binary_iter(name, path_to_bin, env_vars, searchpath,\n\u001b[1;32m--> 604\u001b[1;33m                                  binary_names, url, verbose))\n\u001b[0m\u001b[0;32m    605\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    606\u001b[0m def find_jar_iter(name_pattern, path_to_jar=None, env_vars=(),\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\nltk\\__init__.py\u001b[0m in \u001b[0;36mfind_binary_iter\u001b[1;34m(name, path_to_bin, env_vars, searchpath, binary_names, url, verbose)\u001b[0m\n\u001b[0;32m    596\u001b[0m     \"\"\"\n\u001b[0;32m    597\u001b[0m     for file in  find_file_iter(path_to_bin or name, env_vars, searchpath, binary_names,\n\u001b[1;32m--> 598\u001b[1;33m                      url, verbose):\n\u001b[0m\u001b[0;32m    599\u001b[0m         \u001b[1;32myield\u001b[0m \u001b[0mfile\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    600\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\nltk\\__init__.py\u001b[0m in \u001b[0;36mfind_file_iter\u001b[1;34m(filename, env_vars, searchpath, file_names, url, verbose, finding_dir)\u001b[0m\n\u001b[0;32m    567\u001b[0m                         (filename, url))\n\u001b[0;32m    568\u001b[0m         \u001b[0mdiv\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'='\u001b[0m\u001b[1;33m*\u001b[0m\u001b[1;36m75\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 569\u001b[1;33m         \u001b[1;32mraise\u001b[0m \u001b[0mLookupError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'\\n\\n%s\\n%s\\n%s'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mdiv\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmsg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdiv\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    570\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    571\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mLookupError\u001b[0m: \n\n===========================================================================\nNLTK was unable to find the java file!\nUse software specific configuration paramaters or set the JAVAHOME environment variable.\n==========================================================================="
     ]
    }
   ],
   "source": [
    "parser_path = 'stanford-parser.jar'\n",
    "model_path = 'stanford-parser-4.2.0-models.jar'\n",
    "pcfg_path='edu/stanford/nlp/models/lexparser/shinesePCFG.ser.gz'\n",
    "parser=stanford.StanfordParser(path_to_jar=parser_path,path_to_models_jar=model_path,model_path=pcfg_path)\n",
    "sentence=parser.raw_parse(seg_str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for line in sentence:\n",
    "    print('句法分析的结果:\\n',line)\n",
    "    print('句法树的叶子节点:\\n',line.levaes())\n",
    "    line.deaw()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
